clear all;
close all;

%  =======================================================================================================================================================
% 
%  Code Description: 
%  This codefile builds the CSDB dataset from the raw data provided by the Bundesbank's  Research Data and Service Centre (RDSC)
% 
%  =======================================================================================================================================================
% 
% 
%  General disclaimer:
%  This file directory produces replication code for "Connected Funds". 
%  Because we cannot share the underlying data provided by the Bundesbank's Research Data and Service Centre (RDSC) and other subscription data sources, 
%  we have included pseudo data to show how the raw data are formatted. 
%  Other researchers can go through a similar approval and subscription process to obtain the underlying data. (2023-04-06)
% 
%  =======================================================================================================================================================


projectPath = 'C:\ConnectedFunds_Codebase\'

% Add functions folders
addpath(strcat(projectPath, 'Code\matlab_functions'));

% load raw CSDB files
rawpath = strcat(projectPath, 'Data\CSDB\raw\')
fileNames = dir(strcat(rawpath, '*.parquet'));

load(strcat(projectPath, 'Data\IFS\mat\uwpkenn'), 'uwpkenn');
load(strcat(projectPath, 'Data\IFS\mat\ufund'), 'ufund');

uisin = unique([ufund; uwpkenn]);
clear ufund uwpkenn;

umonth_csdb = NaN(length(fileNames),1);

for t = 1:length(fileNames) 

    t,

    CSDB =  parquetread(strcat(rawpath,fileNames(t).name));
    
    [~,idxWhereRelevantISIN] = ismember(CSDB.ISIN,uisin);
    CSDB(idxWhereRelevantISIN==0,:) = [];
    
    date = fileNames(t).name;
    date = date(end-17:end-8);
    
    umonth_csdb(t) = str2double(date([1:4 6:7])); 
    
    save(strcat(projectPath, 'Data\CSDB\mat\CSDB_', date(1:4), date(6:7)), 'CSDB', 'date');
  
    clear CSDB i* date
end

save(strcat(projectPath, 'Data\CSDB\mat\umonth_csdb'), 'umonth_csdb');

%%%%%%%%% Now: calculate monthly security-level returns based on CSDB

clear all;
close all;

load(strcat(projectPath, 'Data\IFS\mat\uwpkenn'), 'uwpkenn');
load(strcat(projectPath, 'Data\IFS\mat\ufund'), 'ufund');
load(strcat(projectPath, 'Data\IFS\mat\umonth'), 'umonth');

uwpkenn_all = unique([ufund; uwpkenn]);
clear ufund uwpkenn;

%%%%%
Nisins = length(uwpkenn_all);

umonth = [200908; umonth];
umonth(umonth<200908) = [];
Tmonth  = length(umonth);

% ECB FX data (source: https://www.ecb.europa.eu/stats/eurofxref/eurofxref-hist.zip?27f35de1e7f843eea9706c5fbd29b006)
load(strcat(projectPath, 'Data\ECB_ExchangeRates\ECB_FX_Monthly.mat'))
ECB_FX_Monthly(ECB_FX_Monthly.DATUM<umonth(1) | ECB_FX_Monthly.DATUM>umonth(end),:) = [];


%%%%%%%%%
CSDB_Price           = NaN(Tmonth,Nisins);
CSDB_Price_EUR       = NaN(Tmonth,Nisins);
CSDB_AccInterest     = NaN(Tmonth,Nisins);
CSDB_AccInterest_EUR = NaN(Tmonth,Nisins);
CSDB_Coupon_EUR      = NaN(Tmonth,Nisins);
CSDB_Coupon          = NaN(Tmonth,Nisins);
CSDB_Dividend        = NaN(Tmonth,Nisins);
CSDB_Dividend_EUR    = NaN(Tmonth,Nisins);
CSDB_Volume          = NaN(Tmonth,Nisins);
CSDB_Volume_EUR      = NaN(Tmonth,Nisins);
CSDB_Yield           = NaN(Tmonth,Nisins);
%%%%%%%%%

for t = 1 : Tmonth
    
    t,
    umonth(t),
    
    %% step 1: load CSDB data    
    load(strcat(projectPath, 'Data\CSDB\mat\CSDB_',num2str(umonth(t))));
    

    %% step 2: keep only relevant entries
    CSDB = CSDB(~ismember(CSDB.PRICE_DT, {''}), :);
    CSDB(isempty(CSDB.PRICE_DT)==1,:) = [];
    CSDB = CSDB(~ismember(CSDB.PRICE_DT, {'NaT'}), :);
    CSDB(ismissing(CSDB.NOMINAL_CURRENCY),:) = [];
    CSDB(CSDB.PRICE <= 0,:) = [];
    
    [~,idxWhereISIN] = ismember(CSDB.ISIN,uwpkenn_all);
    CSDB(idxWhereISIN==0,:) = [];
    clear idxWhereISIN;
    
    CSDB.PRICE_DT_num = datenum(CSDB.PRICE_DT);
    
    date = datenum(CSDB.EXTRACTION_DT(1));
    
    % since we want to calculate monthly returns, we need relatively recent pricing information
    CSDB(date-CSDB.PRICE_DT_num>15,:) = [];
    

    %% step 3: calculate Euro exchange rates for all currencies (EUR/otherCurrency)
    
    CSDB.ExchangeRate = NaN(height(CSDB),1);
    
    % CSDB exchange rate
    ucurr = unique(CSDB.NOMINAL_CURRENCY);
    ucurr(ismissing(ucurr)==1) = [];
    
    [~,idxWhereCurr] = ismember(CSDB.NOMINAL_CURRENCY,ucurr);
    Curr = NaN(length(ucurr),1);
    x = (CSDB.AMOUNT_OUTST_EUR./CSDB.AMOUNT_OUTSTANDING);
    for i = 1:length(ucurr)
        Curr(i) = nanmedian(x(idxWhereCurr==i));
        if Curr(i) == 0
           Curr(i) = NaN;
        end
    end
    CSDB.ExchangeRate(idxWhereCurr>0) = Curr(idxWhereCurr(idxWhereCurr>0));
    
    clear x idxWhereCurr Curr;
    
    % ECB exchange rate
    [~,idxWhereCurr] = ismember(CSDB.NOMINAL_CURRENCY,ECB_FX_Monthly.Properties.VariableNames);
    CSDB.ExchangeRate(idxWhereCurr>0) = 1./table2array(ECB_FX_Monthly(t,idxWhereCurr(idxWhereCurr>0)));
    clear idxWhereCurr;
    
    % drop entries for which we have no exchange rates
    CSDB(isnan(CSDB.ExchangeRate),:) = [];
    

    %% step 4: calculate price in EUR (for bonds: this is the clean price)
    [~,idxWhereISIN] = ismember(CSDB.ISIN,uwpkenn_all);
    
    CSDB_Price(t,idxWhereISIN)     = CSDB.PRICE; 
    CSDB_Price_EUR(t,idxWhereISIN) = CSDB.PRICE.*CSDB.ExchangeRate;
    
    % accrued interest (end of month)
    CSDB_AccInterest(t,idxWhereISIN)     = CSDB.ACC_INTEREST;
    CSDB_AccInterest_EUR(t,idxWhereISIN) = CSDB.ACC_INTEREST.*CSDB.ExchangeRate;
    
    % coupon payments during the month
    ii = find(isnat(CSDB.COUPON_DT)==0);
    
    CSDB.COUPON_DT_num     = NaN(height(CSDB),1);
    CSDB.COUPON_DT_num(ii) = datenum(CSDB.COUPON_DT(ii));
    clear ii;
    
    % calculate coupon payment from annualised payments
    idx = ismember(CSDB.COUPON_FREQUENCY, {'' 'IR' 'UK' 'OT' 'ZC'});    % empty, iregularly, unknown, other, zero coupon
    CSDB.COUPON_RATE(idx) = NaN;
    CSDB.COUPON_RATE = 0.01.*CSDB.COUPON_RATE;
    clearvars idx
    idx = ismember(CSDB.COUPON_TYPE, {'MO' 'FN'});    % monthly, fortnightly
    CSDB.COUPON_RATE(idx) = CSDB.COUPON_RATE(idx) / 12;
    clearvars idx
    idx = ismember(CSDB.COUPON_TYPE, 'BM');    % bi-monthly
    CSDB.COUPON_RATE(idx) = CSDB.COUPON_RATE(idx) / 6;
    clearvars idx
    idx = ismember(CSDB.COUPON_TYPE, 'QU');    % quarterly
    CSDB.COUPON_RATE(idx) = CSDB.COUPON_RATE(idx) / 4;
    clearvars idx
    idx = ismember(CSDB.COUPON_TYPE, 'SA');    % semi-anually
    CSDB.COUPON_RATE(idx) = CSDB.COUPON_RATE(idx) / 2;
    clearvars idx
    
    % find bonds for which the coupon was paid during the previous month
    ii = find(date - CSDB.COUPON_DT_num <=25);
    CSDB_Coupon(t,idxWhereISIN(ii))     = (CSDB.COUPON_RATE(ii).*CSDB.REDEMP_PRICE(ii));
    CSDB_Coupon_EUR(t,idxWhereISIN(ii)) = (CSDB.COUPON_RATE(ii).*CSDB.REDEMP_PRICE(ii)).*CSDB.ExchangeRate(ii); 
    
    clear ii;

    % dividend payments for equities (during the month)
    ii = find(isnat(CSDB.DIV_DT)==0);
    CSDB.DIV_DT_num     = NaN(height(CSDB),1);
    CSDB.DIV_DT_num(ii) = datenum(CSDB.DIV_DT(ii));
    
    ii = find(date - CSDB.DIV_DT_num <=25); 
    CSDB_Dividend(t,idxWhereISIN(ii))     = CSDB.DIV_AMOUNT(ii);
    CSDB_Dividend_EUR(t,idxWhereISIN(ii)) = CSDB.DIV_AMOUNT(ii).*CSDB.ExchangeRate(ii);
    
    % Compute trading volume in EUR
    if umonth(t)>=201511
        CSDB_Volume(t,idxWhereISIN)     = CSDB.VOLUME_TRADED;
        CSDB_Volume_EUR(t,idxWhereISIN) = CSDB.VOLUME_TRADED.*CSDB.ExchangeRate;
    end
    
    CSDB_Yield(t,idxWhereISIN) = CSDB.YIELD;
    
    clear i* CSDB Curr k* ucurr u x xx ans date;

end

clear t ECB* ans

save(strcat(projectPath, 'Data\CSDB\CSDB_Returns'));
